In this R Notebook I implemented an Convolutional Neural Network (CNN) using the MNIST Database for handwritten digits recognition using MXNet framework for R.
You will need to install the MXNet for R and, if you intent to use your GPU card, the NVidia CUDA Drivers.
Download all four dataset files from MNIST site and gunzip them in the project directory.
Finally, load the libraries.
library(mxnet) # ann framework
library(magrittr) # to use modeling the framework
library(caret) # to use to check the performace
We’ll use an adaptation of gist from Brendan o’Connor to read the files transforming them in a structure simple to use and access.
# read function returns a list of datasets
load_mnist <- function() {
load_image_file <- function(filename) {
ret = list()
f = file(filename,'rb')
readBin(f,'integer',n=1,size=4,endian='big')
ret$n = readBin(f,'integer',n=1,size=4,endian='big')
nrow = readBin(f,'integer',n=1,size=4,endian='big')
ncol = readBin(f,'integer',n=1,size=4,endian='big')
x = readBin(f,'integer',n=ret$n*nrow*ncol,size=1,signed=F)
ret$x = matrix(x, ncol=nrow*ncol, byrow=T)
close(f)
ret
}
load_label_file <- function(filename) {
f = file(filename,'rb')
readBin(f,'integer',n=1,size=4,endian='big')
n = readBin(f,'integer',n=1,size=4,endian='big')
y = readBin(f,'integer',n=n,size=1,signed=F)
close(f)
y
}
train <- load_image_file('./data/train-images-idx3-ubyte')
test <- load_image_file('./data/t10k-images-idx3-ubyte')
train$y <- load_label_file('./data/train-labels-idx1-ubyte')
test$y <- load_label_file('./data/t10k-labels-idx1-ubyte')
return(
list(
train = train,
test = test
)
)
}
# plot one case
show_digit <- function(arr784, col=gray(12:1/12), ...) {
image(matrix(arr784, nrow=28)[,28:1], col=col, ...)
}
# load
mnist <- load_mnist()
Let’s check the dataset
labels <- paste(mnist$train$y[1:5],collapse = ", ")
par(mfrow=c(1,5), mar=c(0.1,0.1,0.1,0.1))
for(i in 1:5) show_digit(mnist$train$x[i,])
Labels: 5, 0, 4, 1, 9
LeNet CNN Architecture
# pipe assign function
# example: rnorm(5,mean=5) %>% sqrt() %=>% "varname" %>% mean()
"%=>%" <- function(val,var) {
assign(substitute(var),val, envir = .GlobalEnv)
return(val)
}
# input data
lenet <- mx.symbol.Variable("data") %>%
# Convolutional Layer Set 1 ( Conv > Tanh > Pool )
mx.symbol.Convolution( kernel=c(5,5), num_filter=20, name="Conv1" ) %=>% "Conv1" %>%
mx.symbol.Activation( act_type="tanh", name="Act1" ) %=>% "Act1" %>%
mx.symbol.Pooling( pool_type="max", kernel=c(2,2),
stride=c(2,2), name = "Pool1") %=>% "Pool1" %>%
# Convolutional Layer Set 1 ( Conv > Tanh > Pool )
mx.symbol.Convolution( kernel=c(5,5), num_filter=50 , name="Conv2") %=>% "Conv2" %>%
mx.symbol.Activation( act_type="tanh", name="Act2" ) %=>% "Act2" %>%
mx.symbol.Pooling( pool_type="max", kernel=c(2,2),
stride=c(2,2), name = "Pool2") %=>% "Pool2" %>%
# Flat representation 50 2D filters -> 1D Array
mx.symbol.flatten( name="Flat") %=>% "Flat1" %>%
# Fully Connected Layer 1
mx.symbol.FullyConnected( num_hidden=500, name="Full1" ) %=>% "Full1" %>%
mx.symbol.Activation( act_type="tanh", name="Act3" ) %=>% "Act3" %>%
# Fully Connected Layer 1
mx.symbol.FullyConnected( num_hidden=10 , name="Full2") %=>% "Full2" %>%
mx.symbol.SoftmaxOutput(name="SoftM") %=>% "SoftM"
Checking the model built
graph.viz( lenet, direction = "LR" )
# Resizing the dataset from 10000 x 784 to (28 x 28) x 1 x 100000
# train
tr.x <- t(mnist$train$x)
dim(tr.x) <- c(28,28,1,ncol(tr.x))
# test
ts.x <- t(mnist$test$x)
dim(ts.x) <- c(28,28,1,ncol(ts.x))
# training
logger.epoc <- mx.callback.log.train.metric(100)
logger.batch <- mx.metric.logger$new()
mx.set.seed(42) # the life, the universe and everything
ti <- proc.time()
model <- mx.model.FeedForward.create(lenet,
X=tr.x,
y=mnist$train$y,
eval.data=list(
data=ts.x,
label=mnist$test$y),
ctx=mx.cpu(),
num.round=20,
array.batch.size=100,
learning.rate=0.05,
momentum=0.9,
wd=0.00001,
eval.metric=mx.metric.accuracy,
epoch.end.callback=logger.epoc,
batch.end.callback=mx.callback.log.train.metric(1, logger.batch))
te <- proc.time()
print(te-ti)
## user system elapsed
## 3186.33 1775.99 1726.14
mx.model.save(model, "mnistModel",1)
plot(logger.batch$train, main = "Train Evolution", ylab="Accuracy", xlab="Batchs")
# process the validation dataset
outputs <- predict(model,ts.x)
# the output is a 10 x 10000 matrix
# transpose to transform in a tidy dataset ( cases x result )
t_outputs <- t(outputs)
# the last layer is a softmax agregator
# so, each column of the dataset is de probability of a value from 0 to 9
# lets get the biggest probability for each test case
y_hat <- max.col(t_outputs)-1 # base index is 1
cm <- confusionMatrix(y_hat,mnist$test$y)
cm$table
## Reference
## Prediction 0 1 2 3 4 5 6 7 8 9
## 0 974 0 1 0 0 2 3 0 2 0
## 1 1 1131 0 0 0 0 2 2 0 0
## 2 0 0 1025 1 1 0 0 2 0 0
## 3 0 0 0 1000 0 5 1 1 1 2
## 4 0 0 1 0 970 0 0 1 0 6
## 5 1 0 0 6 0 878 4 0 2 2
## 6 3 2 1 0 1 2 947 0 0 1
## 7 1 0 2 0 0 0 0 1016 0 3
## 8 0 2 2 3 0 4 1 1 966 1
## 9 0 0 0 0 10 1 0 5 3 994
cm$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.9901000 0.9889958 0.9879601 0.9919467 0.1135000
## AccuracyPValue McnemarPValue
## 0.0000000 NaN
# found where the network most fail
errors <- cm$table
diag(errors) <- 0
worst <- which( errors==max(errors), arr.ind = T) - 1
worst
## Prediction Reference
## 9 9 4
worst.idx <- mnist$test$y == worst[1,"Reference"] & y_hat == worst[1,"Prediction"]
worstcases <- mnist$test$x[worst.idx,]
par(mfrow=c(2,5), mar=c(0.1,0.1,0.1,0.1))
for(i in 1:10) show_digit(worstcases[i,])
Indeed some cases are remarkable difficult to identify as five, like the 2nd, 3rd, 6th, 9th and 10th.
wpred <- t_outputs[ worst.idx, ]
par(mfrow=c(2,5), mar=c(0.1,0.1,0.1,0.1))
for(i in 1:10) barplot(wpred[i,])
# use the layer's references to build a group symbol
# create an executor to controls the network
out <- mx.symbol.Group(c(Conv1, Act1, Pool1, Conv2, Act2, Pool2, Flat1, Full1, Full2, SoftM))
executor <- mx.simple.bind(symbol = out, data=dim(ts.x), ctx=mx.cpu())
# transfer the arguments and parameters learned
mx.exec.update.arg.arrays(executor, model$arg.params, match.name = T)
mx.exec.update.aux.arrays(executor, model$aux.params, match.name = T)
# prepare the input
mx.exec.update.arg.arrays(executor, list(data=mx.nd.array(ts.x)), match.name=TRUE)
# Feedforward: propagates the input to output throught the network
mx.exec.forward(executor, is.train=FALSE)
# list the output elements
names(executor$ref.outputs)
## [1] "Conv1_output" "Act1_output" "Pool1_output" "Conv2_output"
## [5] "Act2_output" "Pool2_output" "Flat_output" "Full1_output"
## [9] "Full2_output" "SoftM_output"
# choosing the first worst case
j <- which( worst.idx==T )[1]
# Conv1 Filters
par(mfrow=c(4,5), mar=c(0.1,0.1,0.1,0.1))
for (i in 1:20) {
outputData <- as.array(executor$ref.outputs$Conv1_output)[,,i,j]
image(outputData[,24:1],
xaxt='n', yaxt='n')
}
par(mfrow=c(4,5), mar=c(0.1,0.1,0.1,0.1))
for (i in 1:20) {
outputData <- as.array(executor$ref.outputs$Act1_output)[,,i,j]
image(outputData[,24:1],
xaxt='n', yaxt='n')
}
par(mfrow=c(4,5), mar=c(0.1,0.1,0.1,0.1))
for (i in 1:20) {
outputData <- as.array(executor$ref.outputs$Pool1_output)[,,i,j]
image(outputData[,12:1],
xaxt='n', yaxt='n')
}
par(mfrow=c(7,7), mar=c(0.1,0.1,0.1,0.1))
for (i in 1:49) {
outputData <- as.array(executor$ref.outputs$Conv2_output)[,,i,j]
image(outputData[,8:1],
xaxt='n', yaxt='n')
}
par(mfrow=c(7,7), mar=c(0.1,0.1,0.1,0.1))
for (i in 1:49) {
outputData <- as.array(executor$ref.outputs$Act2_output)[,,i,j]
image(outputData[,8:1],
xaxt='n', yaxt='n')
}
par(mfrow=c(7,7), mar=c(0.1,0.1,0.1,0.1))
for (i in 1:49) {
outputData <- as.array(executor$ref.outputs$Pool2_output)[,,i,j]
image(outputData[,4:1],
xaxt='n', yaxt='n')
}